# -*- coding: utf-8 -*-
import random
import scrapy
from scrapy import Request
from scrapy.utils.project import get_project_settings
from scrapy.exceptions import IgnoreRequest
from zc_core.util.http_util import retry_request
from eavic.rules import *
from zc_core.dao.sku_dao import SkuDao
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.done_filter import DoneFilter
from datetime import datetime
from scrapy.utils.project import get_project_settings
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full'

    # 常用链接
    item_url = 'https://mall.eavic.com/products/{}'

    # 获取价格链接
    price_url = 'https://mall.eavic.com/products/{}/get_price?id={}'

    # 获取地区库存状况
    area_stock_url = 'https://mall.eavic.com/products/{}/check_stock?area_id=-1'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def _build_list_req(self, sku_id, url, callback):
        return Request(url=url,
                       callback=callback,
                       errback=self.error_back,
                       meta={
                           'reqType': 'item',
                           'batchNo': self.batch_no,
                           'skuId': sku_id,
                       },
                       dont_filter=True,
                       headers={
                           'X-Requested-With': 'XMLHttpRequest',
                           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.42'
                       }
                       )

    def start_requests(self):
        # pool_list = SkuPoolDao().get_sku_pool_list(fields={"_id":1,"offlineTime":1})

        settings = get_project_settings()
        while_list = settings.get("CATALOG_WHITE_LIST")
        if while_list:
            pool_list = SkuDao().get_batch_sku_list(self.batch_no, fields={"_id": 1, "offlineTime": 1},
                                                    query={"$or": while_list})
        else:
            pool_list = SkuDao().get_batch_sku_list(self.batch_no, fields={"_id": 1, "offlineTime": 1})
        self.logger.info('全量：%s' % (len(pool_list)))
        random.shuffle(pool_list)
        for sku in pool_list:
            sku_id = sku.get('_id')
            offline_time = sku.get('offlineTime', 0)

            if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue
            # 避免重复采集
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: [%s]', sku_id)
                continue
            # 采集商品
            yield self._build_list_req(sku_id, self.item_url.format(sku_id), self.parse_item_data)

    # 处理ItemData
    def parse_item_data(self, response):
        meta = response.meta
        sku_id = meta.get('skuId')
        data = parse_item_data(response)
        self.logger.info('商品: [%s]' % data.get('skuId'))
        yield data
